Contents

0.1 Load Packages

library(DESeq2)
library(tximport)
library(biomaRt)
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(readxl)
library(Rtsne)
library(pheatmap)
library(IHW)
library(dplyr)
library(grid)
library(gridExtra)
library(jyluMisc)
library(gtable)

0.2 bioMart

ensembl.mart <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
annotations <-
getBM(
attributes = c(
'hgnc_symbol',
'refseq_mrna',
'transcript_biotype'
),
mart = ensembl.mart
)
annotations <- annotations %>% filter(hgnc_symbol != "" & refseq_mrna != "")

0.3 Read Counts from Salmon

files <- file.path("../data/alignments/", list.files("../data/alignments"),"quant.sf")
sample.ids <- list.files("../data/alignments") %>% map(function(x) gsub("_sequence", "", strsplit(x, split ="lane[[:digit:]]+")[[1]][2]))
names(files) <- sample.ids
tx2gene <- annotations %>% dplyr::select(refseq_mrna, hgnc_symbol)
txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)

0.4 DESeq Dataset

sample.table <-
  data.frame(row.names = sample.ids,
  fileName = list.files("../data/alignments/"))
dds <- DESeqDataSetFromTximport(txi.salmon, sample.table, design = ~1)

0.5 Annotating Samples

sample.list <- read_xlsx("../data/metadata/sampleList.xlsx", sheet = 2)
plate <- sample.list$`Plate well number`
id <- sample.list$ID
sample.name<- sample.list$`Sample Name`
cell_line <- sample.name %>% map(function(x) strsplit(x, split = "_")[[1]][1])
cell_line <- unlist(cell_line)
treatment <- sample.name %>% map(function(x) strsplit(x, split = "_")[[1]][2])
treatment <- unlist(treatment)
treatment<- replace(treatment, treatment=="NTC-1-old", "NTC-1")
repl <- sample.name %>% map(function(x) strsplit(x, split = "_")[[1]][3])
repl <- unlist(repl)
sample.annotations <- data.frame(row.names = id, cell_line = as.character(cell_line), treatment = as.character(treatment), replicate = repl, plate = plate)
write.csv(sample.annotations, file = "../data/metadata/sample_annotations.csv", row.names = T)
colData(dds) <- cbind(colData(dds), sample.annotations[rownames(colData(dds)),])
dds$lane <- c(rep("2",30), rep("3",30))

0.6 Annotating Transcripts

gene.annotations <- annotations[match(rownames(dds), annotations$hgnc_symbol),]
rownames(gene.annotations) <- rownames(dds)
rowData(dds) <- gene.annotations
save(dds, file = "../data/RData/3RNAseq.RData")

0.7 GGM on Expression Matirix

sample.ids <- sample.ids[dds$treatment %in% c("AK2-3D", "NTC-1")]
dds <- dds[,dds$treatment %in% c("AK2-3D", "NTC-1")]

dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
dds <- estimateSizeFactors(dds)
dds <- dds[which(rowSums(counts(dds)) > 50),]
# dds.vst <- varianceStabilizingTransformation(dds, blind = TRUE)
dds.vst <- dds
exprMat <- assay(dds.vst)
sds <- rowSds(exprMat)
exprMat <- exprMat[order(sds, decreasing = TRUE)[1:5000],]
g <- Questools::ggm(data.frame(exprMat), rho = 0.5)

g$network

0.8 PCA on Expression Matirix

pcRes <- prcomp(t(exprMat), scale. = FALSE, center = TRUE)
eigs <- pcRes$sdev^2
eigs <- eigs/sum(eigs)

plotTab <- data.frame(pcRes$x[,c(1,2)])
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <- sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <- sample.annotations[plotTab$sampleID, "treatment"]

ggplot(plotTab, aes(x=PC1, y = PC2, label = sampleID, color = cellLine, shape = treatment)) +
  geom_point() +
  geom_text_repel(aes(PC1,PC2, label = sampleID)) +
  theme_bw()

0.9 t-SNE on Expression Matirix

tsneRun <- function(distMat,perplexity=10,theta=0,max_iter=5000) {
  tsneRes <- Rtsne(distMat, perplexity = perplexity, theta = theta, 
                   max_iter = max_iter, is_distance = TRUE, dims =2)
  tsneRes <- tsneRes$Y
  rownames(tsneRes) <- labels(distMat)
  colnames(tsneRes) <- c("x","y")
  tsneRes
}

distViab <- dist(t(exprMat))

plotTab <- data.frame(tsneRun(distViab, perplexity = 2, max_iter = 10000))

#plot
plotTab$sampleID <- rownames(plotTab)
plotTab$cellLine <- sample.annotations[plotTab$sampleID, "cell_line"]
plotTab$treatment <- sample.annotations[plotTab$sampleID, "treatment"]

ggplot(plotTab, aes(x=x, y = y, label = sampleID, color = cellLine, shape = treatment)) +
  geom_point() +
  geom_text_repel(aes(x,y, label = sampleID)) +
  theme_bw()

0.10 Sample Similarities

colAnno <- data.frame(plotTab[,c("cellLine", "treatment")])
corMat <- cor(exprMat)
pheatmap(corMat, annotation_col = colAnno)

0.11 Differential Expressed Genes between Treatments

dds$id <- unlist(sample.ids)
dds$cell_line <- sample.annotations[dds$id, "cell_line"]
dds$treatment <- sample.annotations[dds$id, "treatment"]
dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
design(dds) <- ~ cell_line

# ddsCell <- DESeq(dds)
# results.cell <- results(ddsCell, contrast = c("cell_line" , "Raji", "Namalwa"))
# results.df <- as.data.frame(results.cell)
# summary(results.df)
# res.05 <- results(ddsCell, alpha = 0.05)
# table(res.05$padj < 0.05)
# resIHW <- results(ddsCell, filterFun=ihw)
# summary(resIHW)
results.cell <- list()
dds$lane <- factor(dds$lane)
dds.cells <- unique(dds$cell_line[dds$treatment %in% c("NTC-1", "AK2-3D")]) %>% map(function(x) dds[,dds$cell_line == x]) %>% map(function(x) x[apply(counts(x), 1, function(x) all(x > 10)),])

dl <- function(x){
  x$treatment <- droplevels(x$treatment)
  x$lane <- droplevels(x$lane)
  if(length(levels(x$lane)) > 1){
    design(x) <- ~treatment + lane
  }else{
    design(x) <- ~treatment
  }
  x
}
dds.cells <- dds.cells %>% map(dl)
results.cell <- dds.cells %>% map(function(x) DESeq(x, betaPrior = T)) %>% map(function(x) results(x, contrast = c("treatment", "NTC-1", "AK2-3D")))
names(results.cell) <- unique(dds$cell_line[dds$treatment %in% c("NTC-1", "AK2-3D")])
null <- names(results.cell) %>% map(function(x) write.csv(results.cell[[x]], paste0("../results/DEG_cell_line/", x, ".csv")))
top.genes <- results.cell %>% map(function(df) rownames(df[df$pvalue < 0.05 & abs(df$log2FoldChange)>0.5,]))

0.12 P-Value Distributions for Treatment

plotList <- list()
results.cell %>% map(function(x) tibble(pval = x$pvalue)) %>% map(function(x) ggplot(x, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
      theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
      xlab("raw p values")) -> plots

plotList<- 1:length(plots) %>% map(function(x) plots[[x]] + ggtitle(sprintf("NTC-1 vs AK2-3D in %s", names(results.cell)[x])))
names(plotList) <- names(results.cell)
grid.arrange(grobs= plotList, ncol = 2)

0.13 Enrichment Analysis for Different Treatments

createInput <- function(DEres, pCut = 0.05, ifFDR = FALSE, rankBy = "stat") {
  if (ifFDR) {
    inputTab <- data.frame(DEres) %>% rownames_to_column(var = "symbol") %>% filter(padj <= pCut)
  } else {
    inputTab <- data.frame(DEres) %>% rownames_to_column(var = "symbol") %>%filter(pvalue <= pCut)
  }

  inputTab <- arrange(inputTab, pvalue) %>% filter(!duplicated(symbol)) %>% select_("symbol", rankBy) %>% data.frame(stringsAsFactors = FALSE)
  rownames(inputTab) <- inputTab$symbol
  inputTab$symbol <- NULL
  colnames(inputTab) <- "stat"
  return(inputTab)
}

gmts <- list(H=system.file("extdata","h.all.v5.1.symbols.gmt",package="BloodCancerMultiOmics2017"),
            C6=system.file("extdata","c6.all.v5.1.symbols.gmt", package="BloodCancerMultiOmics2017"),
            KEGG=system.file("extdata","c2.cp.kegg.v5.1.symbols.gmt", package = "BloodCancerMultiOmics2017"))


results.cell %>% map(function(x) createInput(x, pCut = 0.1, ifFDR = FALSE)) %>% map(function(x) names(gmts) %>% map(function(y) runGSEA(inputTab =x, gmtFile = gmts[[y]], GSAmethod = "page"))) -> p

p %>% map(function(x) x[[1]]) -> res.Halmark
p %>% map(function(x) x[[2]]) -> res.Oncogenetic
p %>% map(function(x) x[[3]]) -> res.KEGG
enBar.Halmark <- plotEnrichmentBar(res.Halmark, pCut = 0.05, ifFDR = FALSE, setName = "HALLMARK")
plot(enBar.Halmark)

enBar.Oncogenetic <- plotEnrichmentBar(res.Oncogenetic, pCut = 0.05, ifFDR = FALSE, setName = "Oncogenetic")
plot(enBar.Oncogenetic)

enBar.KEGG <- plotEnrichmentBar(res.KEGG, pCut = 0.05, ifFDR = FALSE, setName = "KEGG")
plot(enBar.KEGG)

0.14 P-Value Distributions for all Cell Lines

dds <- dds[,dds$treatment %in% c("NTC-1", "AK2-3D")]
dds$treatment <- droplevels(dds$treatment)
dds$cell_line <- droplevels(dds$cell_line)
design(dds) <- ~ cell_line + treatment
dds <- DESeq(dds)
res.all <- results(dds, contrast = c("treatment", "NTC-1", "AK2-3D"))
plotTab <- tibble(pval = res.all$pvalue)
p <- ggplot(plotTab, aes(pval)) + geom_histogram(bins = 20, fill = "blue", col="blue", alpha=0.3) +
  theme_bw() + theme(plot.title = element_text(hjust =0.5)) +
  ggtitle("NTC-1 vs AK2-3D in all cell lines") +
  xlab("raw p values")
p

0.15 Enrichment Analysis for Different Treatments for all Cell Lines

inputTab <- createInput(res.all, pCut = .1, ifFDR = F)
enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$H, GSAmethod = "page")
enRes$Name <- gsub("HALLMARK_","", enRes$Name)
enRes <- list(enRes)
names(enRes) <- "NTC-1 vs AK2-3D"
enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = FALSE, setName = "HALLMARK")
plot(enBar)

inputTab <- createInput(res.all, pCut = .1, ifFDR = F)
enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
enRes$Name <- gsub("Oncogenetic_","", enRes$Name)
enRes <- list(enRes)
names(enRes) <- "NTC-1 vs AK2-3D"
enBar <- plotEnrichmentBar(enRes, pCut = 0.1, ifFDR = FALSE, setName = "Oncogenetic")
plot(enBar)

inputTab <- createInput(res.all, pCut = .1, ifFDR = F)
enRes <- runGSEA(inputTab = inputTab, gmtFile = gmts$C6, GSAmethod = "page")
enRes$Name <- gsub("KEGG_","", enRes$Name)
enRes <- list(enRes)
names(enRes) <- "NTC-1 vs AK2-3D"
enBar <- plotEnrichmentBar(enRes, pCut = .1, ifFDR = FALSE, setName = "KEGG")
plot(enBar)

hv.genes <- function(df, n = 500){
  sds <- apply(df, 1, sd)
  means <- apply(df, 1, mean)
  sds <- unlist(sds)
  means <- unlist(means)
  lm <- lm(sds~means)
  sm <- summary(lm)
  res <- residuals(lm)
  names(res) <- 1:length(res)
  down <- tail(order(res), n)
  down
}

hv.genes <- function(df, n = 500){
  sds <- apply(df, 1, sd)
  down <- head(order(sds), n)
  down
}

cell_lines <- unique(dds$cell_line)
heat.map <- function(cell_line){
  line.indices <- which(dds$cell_line == cell_line)
  colAnno <- data.frame(dds$treatment[line.indices])
  rownames(colAnno) <- colnames(dds)[which(dds$cell_line == cell_line)]
  print(colAnno)
  to.plot <- exprMat[hv.genes(exprMat),]
  to.plot <- t(scale(t(to.plot)))
  pheatmap(to.plot[,line.indices], annotation_col  = colAnno, main = cell_line, show_rownames = F)
}
cell_lines %>% map(heat.map)
##     dds.treatment.line.indices.
## K10                      AK2-3D
## K11                       NTC-1
## K12                      AK2-3D
## K7                        NTC-1
## K8                       AK2-3D
## K9                        NTC-1

##     dds.treatment.line.indices.
## K13                       NTC-1
## K14                      AK2-3D
## K15                       NTC-1
## K16                      AK2-3D
## K17                       NTC-1
## K18                      AK2-3D

##    dds.treatment.line.indices.
## K1                       NTC-1
## K2                      AK2-3D
## K3                       NTC-1
## K4                      AK2-3D
## K5                       NTC-1
## K6                      AK2-3D

##     dds.treatment.line.indices.
## K43                       NTC-1
## K44                      AK2-3D
## K45                       NTC-1
## K46                      AK2-3D
## K47                       NTC-1
## K48                      AK2-3D
## K49                       NTC-1
## K50                      AK2-3D
## K51                       NTC-1
## K52                      AK2-3D

##     dds.treatment.line.indices.
## K19                       NTC-1
## K20                      AK2-3D
## K21                       NTC-1
## K22                      AK2-3D
## K23                       NTC-1
## K24                      AK2-3D

##     dds.treatment.line.indices.
## K25                       NTC-1
## K26                      AK2-3D
## K27                       NTC-1
## K28                      AK2-3D
## K29                       NTC-1
## K30                      AK2-3D

##     dds.treatment.line.indices.
## K31                       NTC-1
## K32                      AK2-3D
## K33                       NTC-1
## K34                      AK2-3D
## K35                       NTC-1
## K36                      AK2-3D

##     dds.treatment.line.indices.
## K37                       NTC-1
## K38                      AK2-3D
## K39                       NTC-1
## K40                      AK2-3D
## K41                       NTC-1
## K42                      AK2-3D

## [[1]]
## 
## [[2]]
## 
## [[3]]
## 
## [[4]]
## 
## [[5]]
## 
## [[6]]
## 
## [[7]]
## 
## [[8]]
pointer <- function(cell_line) {
  line.indices <- which(dds$cell_line == cell_line)
  colAnno <- dds$treatment[line.indices]
  to.plot <- exprMat["AK2", line.indices]
  df <- data.frame(y = to.plot, x = factor(colAnno))
  print(df)
  ggplot(df , aes(x, y,
  fill = factor(colAnno),
  col = factor(colAnno)
  )) + geom_point() + labs(title = cell_line)
}
cell_lines %>% map(pointer)
##       y      x
## K10 247 AK2-3D
## K11 783  NTC-1
## K12 377 AK2-3D
## K7  362  NTC-1
## K8  303 AK2-3D
## K9  376  NTC-1
##       y      x
## K13 244  NTC-1
## K14 264 AK2-3D
## K15 266  NTC-1
## K16 239 AK2-3D
## K17 479  NTC-1
## K18 338 AK2-3D
##      y      x
## K1 198  NTC-1
## K2  88 AK2-3D
## K3 302  NTC-1
## K4  75 AK2-3D
## K5 199  NTC-1
## K6  55 AK2-3D
##       y      x
## K43 385  NTC-1
## K44 198 AK2-3D
## K45 307  NTC-1
## K46  86 AK2-3D
## K47 241  NTC-1
## K48 135 AK2-3D
## K49 246  NTC-1
## K50 251 AK2-3D
## K51 139  NTC-1
## K52 134 AK2-3D
##       y      x
## K19 502  NTC-1
## K20 222 AK2-3D
## K21 473  NTC-1
## K22 317 AK2-3D
## K23 441  NTC-1
## K24 205 AK2-3D
##       y      x
## K25 408  NTC-1
## K26 268 AK2-3D
## K27 381  NTC-1
## K28 264 AK2-3D
## K29 594  NTC-1
## K30 316 AK2-3D
##       y      x
## K31 708  NTC-1
## K32 640 AK2-3D
## K33 560  NTC-1
## K34 622 AK2-3D
## K35 667  NTC-1
## K36 499 AK2-3D
##       y      x
## K37 237  NTC-1
## K38 217 AK2-3D
## K39 643  NTC-1
## K40 446 AK2-3D
## K41 597  NTC-1
## K42 400 AK2-3D
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

library(Vennerable)
vennList<- Venn(top.genes[c("Z138", "Nomo1", "BL60", "THP1")])
gVenn <- compute.Venn(vennList, doWeights  = TRUE)
plot(vennList, type = "ellipses")
## Warning in compute.E4(V, doWeights): Cant do a weighted E4